#### Produce the relationship between parameter values and cost-EDD sensitivity

### Prepare environment

setwd("~/research/coffee-dataverse/src")
do.origspec <- T

source("load.R", encoding="iso-8859-1")

output.suffix <- '-eq24-orig'
df$price.delayed <- get.price.delayed(df)

generate.obs <- function() {
    newdf <- data.frame(edd1000=df$edd1000, yield=df$total.produce/df$total.harvest, price=df$price.delayed)
    subset(newdf, !is.na(edd1000) & !is.na(yield) & !is.na(price))
}

param <- get.fit.params(output.suffix)
load("../data/combined-betagamma-eq24-orig.RData")

yieldrange <- mean(la$gamma[, 1, which(param == 'yield_uncertainty')])
coeff.edd1000 <- mean(la$gamma[, 1, which(param == 'yield_coeff3')])
cost.harvest <- mean(la$gamma[, 1, which(param == 'cost_harvest')])

## Set up likelihood functions

LL <- function(mu, kappa, cost) {
    trueyield <- exp(mu + kappa*newdf$edd1000)

    harvest <- pmax(0, pmin((trueyield * (1 + yieldrange) - cost / newdf$price) / (2*yieldrange * trueyield), 1))
    product <- pmax(0, pmin(((trueyield * (1 + yieldrange))^2 - (cost / newdf$price)^2) / (4*yieldrange * trueyield), trueyield))
    obsyield <- product / harvest
    obsyield[!is.finite(obsyield)] <- NA

    sigma <- sd(newdf$yield)
    -sum(dnorm(newdf$yield, obsyield, sigma, log=T), na.rm=T) + all(harvest == 1) * abs(mu + kappa + cost)
}

library(stats4)

newdf <- generate.obs()
yieldlimit <- log(max(newdf$yield))

generate.varcost <- function(copa) {
    newdf <- generate.obs()
    mu <- -0.1766872
    kappa <- coeff.edd1000
    cost <- cost.harvest + copa * (newdf$edd1000 - mean(newdf$edd1000))

    trueyield <- exp(mu + kappa*newdf$edd1000)
    harvest <- pmax(0, pmin((trueyield * (1 + yieldrange) - cost / newdf$price) / (2*yieldrange * trueyield), 1))
    product <- pmax(0, pmin(((trueyield * (1 + yieldrange))^2 - (cost / newdf$price)^2) / (4*yieldrange * trueyield), trueyield))
    obsyield <- product / harvest
    obsyield[!is.finite(obsyield)] <- NA

    newdf$yield <- obsyield
    subset(newdf, !is.na(yield))
}

## Calculate maximum likelihoods

results <- matrix(nrow=0, ncol=3)
for (copa in seq(-5, 5, length.out=100)) {
    print(copa)
    newdf <- generate.varcost(copa)
    mle(LL, start=list(mu=-0.1766834, kappa=coeff.edd1000, cost=cost.harvest), method = "L-BFGS-B", lower=c(-yieldlimit, -10, 0),
        upper = c(yieldlimit, 10, 1))

    fit <- tryCatch({
        mle(LL, start=list(mu=-0.1766834, kappa=coeff.edd1000, cost=cost.harvest), method = "L-BFGS-B", lower=c(-yieldlimit, -10, 0),
            upper = c(yieldlimit, 10, 1))
    }, error=function(e) {
        NULL
    })
    if (!is.null(fit))
        results <- rbind(results, attributes(fit)$coef)
    else
        results <- rbind(results, rep(NA, 3))
    print(results)
}

## Plot graphs

library(ggplot2)

mod.base <- get.model.base(df)

resdf <- data.frame(copa=rep(seq(-5, 5, length.out=100), 3), value=c(results[,1], results[,2], results[,3]), group=rep(c('Baseline yield', 'EDD yield marginal', 'Harvest cost (1000s $/Ha)'), each=100))
knowndf <- data.frame(value=c(mean(log(generate.obs()$yield) - coeff.edd1000 * generate.obs()$edd1000), coeff.edd1000, cost.harvest), group=c('Baseline yield', 'EDD yield marginal', 'Harvest cost (1000s $/Ha)'))
ribdf <- data.frame(copa=rep(seq(-5, 5, length.out=100), 3),
                    cilo=rep(c(mean(log(generate.obs()$yield) - quantile(la$gamma[, 1, which(param == 'yield_coeff3')], .25) * generate.obs()$edd1000), quantile(la$gamma[, 1, which(param == 'yield_coeff3')], .25), quantile(la$gamma[, 1, which(param == 'cost_harvest')], .25)), each=100),
                    cihi=rep(c(mean(log(generate.obs()$yield) - quantile(la$gamma[, 1, which(param == 'yield_coeff3')], .75) * generate.obs()$edd1000), quantile(la$gamma[, 1, which(param == 'yield_coeff3')], .75), quantile(la$gamma[, 1, which(param == 'cost_harvest')], .75)), each=100),
                    group=rep(knowndf$group, each=100))

ggplot(subset(resdf, group != "Baseline yield"), aes(x=copa)) +
    facet_grid(group ~ ., scales="free") +
    geom_line(aes(y=value)) +
    geom_hline(data=subset(knowndf, group != "Baseline yield"), aes(yintercept=value), colour='blue') +
    geom_ribbon(data=subset(ribdf, group != "Baseline yield"), aes(x=copa, ymin=cilo, ymax=cihi), alpha=.5, fill='blue') +
    geom_hline(data=data.frame(value=mod.base$coeff[3], group='EDD yield marginal'), aes(yintercept=value), colour='red') +
    xlab("Marginal effect of EDDs on cost") + ylab("") +
    theme_bw() + scale_x_continuous(expand=c(0, 0)) + ggtitle("Comparison of parameters under cost-EDD sensitivity")
ggsave("../figures/alts/costkdds.pdf", width=6.5, height=4)
